TODO: Synopsis

Data Processing

The data is loaded with the read.table command, I tried also to use the fread command of the data.table package but unfortunately this threw an error. As the dataset with around 600MB seems ok for standard PC I decided not to investigate any further this problem. The packages I include in my processing are dplyr, stringr and ggmpap. ddplyr is really powerful in data management and processing, so firstly I loaded the data in the local dataframe file type of dplyr. In a next step I reduce the data to the relevant variables (in my point of view). These are the date, where an event occured, event type, and its damage it is causing, expressed in the variables fatalities, injuries, property damages and crop damages. Furthermore I include the longitude and latitude information to locate the events and show its magnitude.

library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(ggmap)
## Loading required package: ggplot2
setwd("/home/me/Github/Courseproject2_RepR/")
#data_storm       <- read.table("repdata_data_StormData.csv", sep=",",header=TRUE)
load("stormdata.RData")
data_df          <- select(data_storm, BGN_DATE, EVTYPE, FATALITIES, INJURIES,PROPDMG, CROPDMG, LATITUDE,LONGITUDE)
data_df          <- tbl_df(data_df)
data_df$BGN_DATE <- as.character(data_df$BGN_DATE)

data_df_MUT      <- mutate(data_df ,
                                    YEAR = str_extract(BGN_DATE, "\\d{4}"),
                                    LONGITUDE = - LONGITUDE/100, 
                                    LATITUDE =LATITUDE/100 )
data_df_MUT      <- select(data_df_MUT, -BGN_DATE)
data_df_analysis <- select(data_df_MUT, YEAR, EVTYPE:LONGITUDE)

There should be a section titled Data Processing which describes (in words and code) how the data were loaded into R and processed for analysis. In particular, your analysis must start from the raw CSV file containing the data. You cannot do any preprocessing outside the document. If preprocessing is time-consuming you may consider using the cache = TRUE option for certain code chunks.

Results

top_param <- 20

data_question1   <- arrange(data_df_analysis, desc(FATALITIES + INJURIES))
data_question1
## Source: local data frame [902,297 x 8]
## 
##    YEAR            EVTYPE FATALITIES INJURIES PROPDMG CROPDMG LATITUDE
## 1  1979           TORNADO         42     1700  250.00    0.00    33.50
## 2  1994         ICE STORM          1     1568   50.00    5.00     0.00
## 3  1953           TORNADO         90     1228  250.00    0.00    42.28
## 4  2011           TORNADO        158     1150    2.80    0.00    37.03
## 5  1974           TORNADO         36     1150  250.00    0.00    39.38
## 6  1953           TORNADO        116      785   25.00    0.00    43.06
## 7  2011           TORNADO         44      800    1.50    0.00    33.03
## 8  1998             FLOOD          2      800   50.00   50.00     0.00
## 9  2004 HURRICANE/TYPHOON          7      780    5.42  285.00     0.00
## 10 1998             FLOOD          0      750  268.00    1.55     0.00
## ..  ...               ...        ...      ...     ...     ...      ...
## Variables not shown: LONGITUDE (dbl)
data_question1_plot        <- filter(data_question1, LONGITUDE != 0 & LATITUDE !=0)
data_question1_plot_top  <- data_question1_plot [1:top_param,]

map_quest1<- get_map(location='USA', zoom=4)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=USA&zoom=4&size=%20640x640&scale=%202&maptype=terrain&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=USA&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
mapPoints_question1       <- ggmap(map_quest1) + geom_point(aes(x=LONGITUDE, 
                                                y=LATITUDE, 
                                                size=FATALITIES+INJURIES),
                                                data=data_question1_plot_top, 
                                                alpha=0.5)                                            

mapPoints_question1 <- mapPoints_question1 +  scale_size_area(breaks = c(1, 500, 1000, 1500, 2000), 
                                            labels = c(1, 500, 1000, 1500, 2000), 
                                            name = "Fatalities and Injuries")
mapPoints_question1

data_question2  <- arrange(data_df_analysis, desc(PROPDMG + CROPDMG))
data_question2
## Source: local data frame [902,297 x 8]
## 
##    YEAR            EVTYPE FATALITIES INJURIES PROPDMG CROPDMG LATITUDE
## 1  2009 THUNDERSTORM WIND          0        0    5000       0    36.04
## 2  2010       FLASH FLOOD          0        0    5000       0    41.19
## 3  2010       FLASH FLOOD          0        0    5000       0    41.27
## 4  2011        WATERSPOUT          0        0    5000       0    27.06
## 5  2009         LANDSLIDE          0        0    4800       0     0.00
## 6  2009           TORNADO          0        0    4410       0    35.51
## 7  2009 THUNDERSTORM WIND          0        0    3500       0    35.21
## 8  2010 THUNDERSTORM WIND          0       25    3200       0    41.11
## 9  2009         HIGH WIND          0        0    3000       0     0.00
## 10 2009             FLOOD          0        0    3000       0    39.51
## ..  ...               ...        ...      ...     ...     ...      ...
## Variables not shown: LONGITUDE (dbl)
data_question2_plot        <- filter(data_question2, LONGITUDE != 0 & LATITUDE !=0)
data_question2_plot_top  <- data_question2_plot [1:top_param,]

map_quest2<- get_map(location='USA', zoom=4)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=USA&zoom=4&size=%20640x640&scale=%202&maptype=terrain&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=USA&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
mapPoints_question2       <- ggmap(map_quest2) + geom_point(aes(x=LONGITUDE, 
                                                y=LATITUDE, 
                                                size=PROPDMG + CROPDMG),
                                                data=data_question2_plot_top, 
                                                alpha=0.5)                                            

mapPoints_question2 <- mapPoints_question2 +  scale_size_area(breaks = c(1000, 2000, 3000, 4000, 5000), 
                                            labels = c(1000, 2000, 3000, 4000, 5000), 
                                            name = "Property and crop damage") + facet_grid(EVTYPE~., shrink=FALSE, space="free")
mapPoints_question2